RSA functionality

# speaker informativity
# ---------------------
speaker.inf = function(d, alpha, cost = 0) {
  exp(alpha*(log(d) - cost))
}
# speaker likelihood
# ------------------
speaker.lhd = function(rating, degree, m, alpha) {
  numerator = speaker.inf(m[rating, degree], alpha)
  normalize = sum(sapply(m[rating, ], function(i) {speaker.inf(i, alpha)}))
  return(numerator / normalize)
}
# non-normalized posterior
# -----------------------
nn.post = function(rating, degree, m, alpha, useprior) {
  prior = priors[rating, "prior.p"]
  return(speaker.lhd(rating, degree, m, alpha) * prior)
}
# normalized posterior
# --------------------
norm.post = function(rating, degree, m, alpha, useprior) {
  nn = nn.post(rating, degree, m, alpha, useprior)
  normalize = sum(unlist(sapply(seq(1, 5), function(i){nn.post(i, degree, m, alpha, useprior)})))
  return(nn / normalize)
}

Run model functionality

# run.partial()
# ------------
# Run RSA with model1 (entailment) and model2 (entailment + generic)
run.partial = function(d, alpha=1, useprior=F, usenone=F, normalize=F) {
  if (normalize) {
    mat = d %>%
      select(stars, degree, speaker.p) %>%
      spread(degree, speaker.p) %>%
      mutate(hi = hi / sum(hi),
             low = low / sum(low)) %>%
      select(hi, low)
  } else {
    mat = d %>%
      select(stars, degree, speaker.p) %>%
      spread(degree, speaker.p) %>%
      select(hi, low)
  }
  
  if (usenone) {
    mat$none = c(1, 0, 0, 0, 0)
  } 
  
  d$pred = round(as.numeric(mapply(norm.post, d$stars, d$degree, 
                                    MoreArgs = list(m = mat, 
                                                    alpha = alpha, 
                                                    useprior = useprior))), 
                  digits=4)
  
  return(d)
}

# run.full()
# ------------
# Run RSA with model3 (full) with alternatives
run.full = function(d, alpha=1, useprior=F, usenone=F, addMid=F, normalize=F) {
  # alpha = scales.entropy[scales.entropy$scale==d$scale[1], ]$Entropy
  if (addMid) {
    if (normalize) {
      mat = d %>%
        select(stars, degree, speaker.p) %>%
        spread(degree, speaker.p) %>%
        mutate(hi1 = hi1 / sum(hi1),
               hi2 = hi2 / sum(hi2),
               mid = mid / sum(mid),
               low1 = low1 / sum(low1),
               low2 = low2 / sum(low2)) %>%
        select(hi1, hi2, mid, low1, low2)    
    } else {
      mat = d %>%
        select(stars, degree, speaker.p) %>%
        spread(degree, speaker.p) %>%
        select(hi1, hi2, mid, low1, low2)    
    }
  } else {
    if (normalize) {
      mat = d %>%
        select(stars, degree, speaker.p) %>%
        spread(degree, speaker.p) %>%
        mutate(hi1 = hi1 / sum(hi1),
               hi2 = hi2 / sum(hi2),
               low1 = low1 / sum(low1),
               low2 = low2 / sum(low2)) %>%
        select(hi1, hi2, low1, low2)
    } else {
      mat = d %>%
        select(stars, degree, speaker.p) %>%
        spread(degree, speaker.p) %>%
        select(hi1, hi2, low1, low2)
    }
  }
  
  if (usenone) {
    mat$none = c(1, 0, 0, 0, 0)
  } 
  
  d$pred = round(as.numeric(mapply(norm.post, d$stars, d$degree, 
                                    MoreArgs = list(m = mat, 
                                                    alpha = alpha, 
                                                    useprior = useprior))), 
                  digits=4)
  
  return(d)
}

Tune hyperparams

# tune.alhpa()
# ------------
# d        --> data
# alphas   --> range of alphas to test
# type     --> full or partial model
# useprior --> use uniform prior
# usenone  --> use gener None
tune.alpha = function(d, alphas = seq(from=1, to=10),
                      type="partial", useprior = T,
                      usenone=F, compare.data=NULL, addMid = F, normalize=F) {
  # Tune best alphas
  fit = sapply(alphas, FUN=function(n) {
    if (type == "partial") {
      md = d %>%
        do(run.partial(., alpha=n, useprior=useprior, usenone=usenone, normalize=normalize))
    } else {
      md = d %>%
        do(run.full(., alpha=n, useprior=useprior, usenone=usenone, addMid=addMid, normalize=normalize))
    }
    # Toggle fit to e6 data
    if (!is.null(compare.data)) {
      
      # If we're using e11 data with multiple scalars
      if ("hi1" %in% md$degree) {
        matched.items = which((md[, "scale"] != "some_all" &
                                 (md[, "degree"] == "hi2" | md[, "degree"] == "hi1")) |
                                (md[, "scale"] == "some_all" &
                                   (md[, "degree"] == "hi1" | md[, "degree"] == "low1")))
        md = md[matched.items, ]
        md$degree = ifelse(md$degree == "hi1", "hi", "low")
        stopifnot("listener.p" %in% colnames(compare.data))
      }
      md$listener.p = compare.data$listener.p
    }
    
    # MSE
    return(mean((md$pred - md$listener.p)^2))
  })  
  # get lowest MSE
  best.alpha = which(fit == min(fit))
  return(best.alpha)
}

# Uniform priors
unif.priors = data.frame(stars = seq(1, 5), prior.p = rep(0.2, 5))
emp.priors = read.csv("~/Desktop/Projects/scalar_implicature/models/model_data/emp_priors.csv")
priors = unif.priors

scales.entropy = read.csv("~/Desktop/Projects/scalar_implicature/models/model_data/scales_entropy.csv")

Model Comparisons

Data for entailment models

speaker = read.csv("~/Desktop/Projects/scalar_implicature/models/model_data/L0_e8.csv")
listener = read.csv("~/Desktop/Projects/scalar_implicature/models/model_data/L1_e6.csv")
# Combine speaker / listener
data.partial = left_join(speaker, listener) %>%
  left_join(priors) %>%
  rowwise %>%
  select(scale, degree, stars, speaker.p, listener.p, prior.p) %>%
  mutate(listener.p = ifelse(is.na(listener.p), 0, listener.p)) %>%
  group_by(scale)

Data for full model - no alternatives

speaker = read.csv("~/Desktop/Projects/scalar_implicature/models/model_data/L0_e10.csv")
listener = read.csv("~/Desktop/Projects/scalar_implicature/models/model_data/L1_e11.csv")
data.full = left_join(speaker, listener) %>%
  left_join(priors) %>%
  rowwise %>%
  select(scale, degree, stars, speaker.p, listener.p, prior.p) %>%
  mutate(listener.p = ifelse(is.na(listener.p), 0, listener.p)) %>%
  group_by(scale)

Data for full model - alternatives

speaker = read.csv("~/Desktop/Projects/scalar_implicature/models/model_data/L0_e10a.csv")
data.full.extras = left_join(speaker, listener) %>%
  left_join(priors) %>%
  rowwise %>%
  select(scale, degree, stars, speaker.p, listener.p, prior.p) %>%
  mutate(listener.p = ifelse(is.na(listener.p), 0, listener.p)) %>%
  group_by(scale)
## Joining by: c("scale", "degree", "stars")
## Warning in left_join_impl(x, y, by$x, by$y): joining factors with different
## levels, coercing to character vector
## Joining by: "stars"
## Warning: Grouping rowwise data frame strips rowwise nature

Match items between studies (full and partial)

matched.items = which((data.full[, "scale"] != "some_all" &
        (data.full[, "degree"] == "hi2" | data.full[, "degree"] == "hi1")) |
       (data.full[, "scale"] == "some_all" &
          (data.full[, "degree"] == "hi1" | data.full[, "degree"] == "low1")))
matched.items.extras = which((data.full.extras[, "scale"] != "some_all" &
        (data.full.extras[, "degree"] == "hi2" | data.full.extras[, "degree"] == "hi1")) |
       (data.full.extras[, "scale"] == "some_all" &
          (data.full.extras[, "degree"] == "hi1" | data.full.extras[, "degree"] == "low1")))

Run comparisons

# Save performance output
performance.output = data.frame(model=rep(NA, 16),
                                cor.e6=rep(NA, 16),
                                cor.e11=rep(NA, 16),
                                normalized=rep(NA, 16))
alphas = rep(NA, 16)
data.full.transfer = data.full[matched.items, ]
data.full.transfer$degree =
  ifelse(data.full.transfer$degree == "hi1", "hi", "low")
# Model 1 - Entailment only models
# --------------------------------
# 1.a)
# Entailment only
# Normalized = T
# alpha tuning = F
alphas[1] = 1
m1.a = data.partial %>%
  do(run.partial(., alpha = alphas[1], useprior=T, usenone=F, normalize=T))
# Store output and add e11
m1.a = cbind(m1.a, data.full.transfer$listener.p)
colnames(m1.a)[8] = "e11.listener.p"
performance.output[1, ] = c("m1.a", round(cor(m1.a$listener.p, m1.a$pred), 5),
                            round(cor(m1.a$e11.listener.p, m1.a$pred), 5), T)
# Store plot
m1.a.plot = qplot(stars, listener.p, col=degree, 
      data=m1.a,
      main=paste("m1.a\nalpha: ", alphas[1], "\nNormalized = ", T),
      ylab="Posterior p(rating | word)") + 
  facet_wrap(~scale) + 
  geom_line(aes(y = pred), lty = 4)

# 1.b)
# Entailment only
# Normalized = F
# alpha tune = F
alphas[2] = 1
m1.b = data.partial %>%
  do(run.partial(., alpha = alphas[1], useprior=T, usenone=F, normalize=F))
# Store output
m1.b = cbind(m1.b, data.full.transfer$listener.p)
colnames(m1.b)[8] = "e11.listener.p"
performance.output[2, ] = c("m1.b", round(cor(m1.b$listener.p, m1.b$pred), 5),
                            round(cor(m1.b$e11.listener.p, m1.b$pred), 5), T)
# Store plot
m1.b.plot = qplot(stars, listener.p, col=degree, 
      data=m1.b,
      main=paste("m1.b\nalpha: ", alphas[2], "\nNormalized = ", F),
      ylab="Posterior p(rating | word)") + 
  facet_wrap(~scale) + 
  geom_line(aes(y = pred), lty = 4)

# 1.c)
# Entailment only
# Normalized = T
# alpha tune = T
alphas[3] = tune.alpha(data.partial, normalize=T, compare.data=data.full.transfer)
m1.c = data.partial %>%
  do(run.partial(., alpha = alphas[3], useprior=T, usenone=F, normalize=T))
# Store output
m1.c = cbind(m1.c, data.full.transfer$listener.p)
colnames(m1.c)[8] = "e11.listener.p"
performance.output[3, ] = c("m1.c", round(cor(m1.c$listener.p, m1.c$pred), 5),
                            round(cor(m1.c$e11.listener.p, m1.c$pred), 5), T)
# Store plot
m1.c.plot = qplot(stars, listener.p, col=degree, 
      data=m1.c,
      main=paste("m1.c\nalpha: ", alphas[3], "\nNormalized = ", T),
      ylab="Posterior p(rating | word)") + 
  facet_wrap(~scale) + 
  geom_line(aes(y = pred), lty = 4)

# 1.d)
# Entailment only
# Normalized = F
# alpha tune = T
alphas[4] = tune.alpha(data.partial, normalize=F)
m1.d = data.partial %>%
  do(run.partial(., alpha = alphas[4], useprior=T, usenone=F, normalize=F))
# Store output
m1.d = cbind(m1.d, data.full.transfer$listener.p)
colnames(m1.d)[8] = "e11.listener.p"
performance.output[4, ] = c("m1.d", round(cor(m1.d$listener.p, m1.d$pred), 5),
                            round(cor(m1.d$e11.listener.p, m1.d$pred), 5), normalized=F)
# Store plot
m1.d.plot = qplot(stars, listener.p, col=degree, 
      data=m1.d,
      main=paste("m1.d\nalpha: ", alphas[4], "\nNormalized = ", F),
      ylab="Posterior p(rating | word)") + 
  facet_wrap(~scale) + 
  geom_line(aes(y = pred), lty = 4)

# Model 2 - Entailment + generic none
# --------------------------------
# 2.a)
# Entailment + generic None
# Normalized = T
# alpha tuning = F
alphas[5] = 1
m2.a = data.partial %>%
  do(run.partial(., alpha = alphas[5], useprior=T, usenone=T, normalize=T))
# Store output
m2.a = cbind(m2.a, data.full.transfer$listener.p)
colnames(m2.a)[8] = "e11.listener.p"
performance.output[5, ] = c("m2.a", round(cor(m2.a$listener.p, m2.a$pred), 5),
                            round(cor(m2.a$e11.listener.p, m2.a$pred), 5), T)
# Store plot
m2.a.plot = qplot(stars, listener.p, col=degree, 
      data=m2.a,
      main=paste("m2.a\nalpha: ", alphas[5], "\nNormalized = ", T),
      ylab="Posterior p(rating | word)") + 
  facet_wrap(~scale) + 
  geom_line(aes(y = pred), lty = 4)

# 2.b)
# Entailment + generic None
# Normalized = F
# alpha tune = F
alphas[6] = 1
m2.b = data.partial %>%
  do(run.partial(., alpha = alphas[6], useprior=T, usenone=T, normalize=F))
# Store output
m2.b = cbind(m2.b, data.full.transfer$listener.p)
colnames(m2.b)[8] = "e11.listener.p"
performance.output[6, ] = c("m2.b", round(cor(m2.b$listener.p, m2.b$pred), 5),
                            round(cor(m2.b$e11.listener.p, m2.b$pred), 5), F)
# Store plot
m2.b.plot = qplot(stars, listener.p, col=degree, 
      data=m2.b,
      main=paste("m2.b\nalpha: ", alphas[6], "\nNormalized = ", F),
      ylab="Posterior p(rating | word)") + 
  facet_wrap(~scale) + 
  geom_line(aes(y = pred), lty = 4)

# 2.c)
# Entailment + generic None
# Normalized = T
# alpha tune = T
# compare.data = Toggle this with data.full.transfer for e11 or NULL for e6
alphas[7] = tune.alpha(data.partial, normalize=T, compare.data=NULL)
m2.c = data.partial %>%
  do(run.partial(., alpha = alphas[7], useprior=T, usenone=T, normalize=T))
# Store output
m2.c = cbind(m2.c, data.full.transfer$listener.p)
colnames(m2.c)[8] = "e11.listener.p"
performance.output[7, ] = c("m2.c", round(cor(m2.c$listener.p, m2.c$pred), 5),
                            round(cor(m2.c$e11.listener.p, m2.c$pred), 5), T)
# Store plot
m2.c.plot = qplot(stars, listener.p, col=degree, 
      data=m2.c,
      main=paste("m2.c\nalpha: ", alphas[7], "\nNormalized = ", T),
      ylab="Posterior p(rating | word)") + 
  facet_wrap(~scale) + 
  geom_line(aes(y = pred), lty = 4)

# 2.d)
# Entailment + generic None
# Normalized = F
# alpha tune = T
# compare.data = Toggle this with data.full.transfer for e11 or NULL for e6
alphas[8] = tune.alpha(data.partial, normalize=F)
m2.d = data.partial %>%
  do(run.partial(., alpha = alphas[8], useprior=T, usenone=T, normalize=F))
# Store output
m2.d = cbind(m2.d, data.full.transfer$listener.p)
colnames(m2.d)[8] = "e11.listener.p"
performance.output[8, ] = c("m2.d", round(cor(m2.d$listener.p, m2.d$pred), 5),
                            round(cor(m2.d$e11.listener.p, m2.d$pred), 5), F)
# Store plot
m2.d.plot = qplot(stars, listener.p, col=degree, 
      data=m2.d,
      main=paste("m2.d\nalpha: ", alphas[8], "\nNormalized = ", F),
      ylab="Posterior p(rating | word)") + 
  facet_wrap(~scale) + 
  geom_line(aes(y = pred), lty = 4)

# Model 3 - Empirical alternatives
# --------------------------------
# 3.a)
# Emp alts
# Normalized = T
# alpha tuning = F
alphas[9] = 1
m3.a = data.full %>%
  do(run.full(., alpha = alphas[9], addMid=F, normalize=T))
# Match with e6 w/ checks
m3.a.matched = m3.a[matched.items, ]
m3.a.matched$degree = ifelse(m3.a.matched$degree == "hi1", "hi", "low")
m3.a.matched = cbind(m3.a.matched, data.partial$listener.p)
all(m3.a.matched$scale == data.partial$scale & m3.a.matched$degree == data.partial$degree)
## [1] TRUE
colnames(m3.a.matched)[length(colnames(m3.a.matched))] = "e6.listener.p"
# Store output
performance.output[9, ] = c("m3.a", round(cor(m3.a.matched$e6.listener.p, m3.a.matched$pred), 5),
                            round(cor(m3.a.matched$listener.p, m3.a.matched$pred), 5), T)
# Store plot
m3.a.plot = qplot(stars, e6.listener.p, col=degree, 
      data=m3.a.matched,
      main=paste("m3.a\nalpha: ", alphas[9], "\nNormalized = ", T),
      ylab="Posterior p(rating | word)") + 
  facet_wrap(~scale) + 
  geom_line(aes(y = pred), lty = 4)

# 3.b)
# Emp alts
# Entailment + generic None
# Normalized = F
# alpha tune = F
alphas[10] = 1
m3.b = data.full %>%
  do(run.full(., alpha = alphas[10], addMid=F, normalize=F))
# Match with e6 w/ checks
m3.b.matched = m3.b[matched.items, ]
m3.b.matched$degree = ifelse(m3.b.matched$degree == "hi1", "hi", "low")
m3.b.matched = cbind(m3.b.matched, data.partial$listener.p)
all(m3.b.matched$scale == data.partial$scale & m3.b.matched$degree == data.partial$degree)
## [1] TRUE
colnames(m3.b.matched)[length(colnames(m3.b.matched))] = "e6.listener.p"
# Store output
performance.output[10, ] = c("m3.b", round(cor(m3.b.matched$e6.listener.p, m3.b.matched$pred), 5),
                             round(cor(m3.b.matched$listener.p, m3.b.matched$pred), 5), F)
# Store plot
m3.b.plot = qplot(stars, e6.listener.p, col=degree, 
      data=m3.b.matched,
      main=paste("m3.b\nalpha: ", alphas[10], "\nNormalized = ", F),
      ylab="Posterior p(rating | word)") + 
  facet_wrap(~scale) + 
  geom_line(aes(y = pred), lty = 4)

# 3.c)
# Emp alts
# Normalized = T
# alpha tune = T
# compare.data = Toggle this with data.partial for e6 or NULL for e11
alphas[11] = tune.alpha(data.full, type = "full", compare.data=data.partial, addMid=F, normalize=T)
m3.c = data.full %>%
  do(run.full(., alpha = alphas[11], addMid=F, normalize=T))
# Match with e6 w/ checks
m3.c.matched = m3.c[matched.items, ]
m3.c.matched$degree = ifelse(m3.c.matched$degree == "hi1", "hi", "low")
m3.c.matched = cbind(m3.c.matched, data.partial$listener.p)
all(m3.c.matched$scale == data.partial$scale & m3.c.matched$degree == data.partial$degree)
## [1] TRUE
colnames(m3.c.matched)[length(colnames(m3.c.matched))] = "e6.listener.p"
# Store output
performance.output[11, ] = c("m3.c", round(cor(m3.c.matched$e6.listener.p, m3.c.matched$pred), 5),
                             round(cor(m3.c.matched$listener.p, m3.c.matched$pred), 5), T)
# Store plot
m3.c.plot = qplot(stars, e6.listener.p, col=degree, 
      data=m3.c.matched,
      main=paste("m3.c\nalpha: ", alphas[11], "\nNormalized = ", T),
      ylab="Posterior p(rating | word)") +
      ylim(0, 1) +
  facet_wrap(~scale) + 
  geom_line(aes(y = pred), lty = 4)

# 3.d)
# Emp alts
# Normalized = F
# alpha tune = T
# compare.data = Toggle this with data.partial or NULL
alphas[12] = tune.alpha(data.full, type = "full", compare.data=data.partial, addMid=F, normalize=F)
m3.d = data.full %>%
  do(run.full(., alpha = alphas[12], addMid=F, normalize=F))
# Match with e6 w/ checks
m3.d.matched = m3.d[matched.items, ]
m3.d.matched$degree = ifelse(m3.d.matched$degree == "hi1", "hi", "low")
m3.d.matched = cbind(m3.d.matched, data.partial$listener.p)
all(m3.d.matched$scale == data.partial$scale & m3.d.matched$degree == data.partial$degree)
## [1] TRUE
colnames(m3.d.matched)[length(colnames(m3.d.matched))] = "e6.listener.p"
# Store output
performance.output[12, ] = c("m3.d", round(cor(m3.d.matched$e6.listener.p, m3.d.matched$pred), 5),
                             round(cor(m3.d.matched$listener.p, m3.d.matched$pred), 5), F)
# Store plot
m3.d.plot = qplot(stars, e6.listener.p, col=degree, 
      data=m3.d.matched,
      main=paste("m3.d\nalpha: ", alphas[12], "\nNormalized = ", F),
      ylab="Posterior p(rating | word)") + 
  facet_wrap(~scale) + 
  geom_line(aes(y = pred), lty = 4)


# Model 4 - Empirical alternatives + extras
# -----------------------------------------
# 4.a)
# Emp alts + extras
# Normalized = T
# alpha tuning = F
alphas[13] = 1
m4.a = data.full.extras %>%
  do(run.full(., alpha = alphas[13], addMid=T, normalize=T))
# Match with e6 w/ checks
m4.a.matched = m4.a[matched.items.extras, ]
m4.a.matched$degree = ifelse(m4.a.matched$degree == "hi1", "hi", "low")
m4.a.matched = cbind(m4.a.matched, data.partial$listener.p)
all(m4.a.matched$scale == data.partial$scale & m4.a.matched$degree == data.partial$degree)
## [1] TRUE
colnames(m4.a.matched)[length(colnames(m4.a.matched))] = "e6.listener.p"
# Store output
performance.output[13, ] = c("m4.a", round(cor(m4.a.matched$e6.listener.p, m4.a.matched$pred), 5), 
                             round(cor(m4.a.matched$listener.p, m4.a.matched$pred), 5), T)
# Store plot
m4.a.plot = qplot(stars, e6.listener.p, col=degree, 
      data=m4.a.matched,
      main=paste("m4.a\nalpha: ", alphas[13], "\nNormalized = ", T),
      ylab="Posterior p(rating | word)") + 
  facet_wrap(~scale) + 
  geom_line(aes(y = pred), lty = 4)

# 4.b)
# Emp alts + extras
# Normalized = F
# alpha tune = F
alphas[14] = 1
m4.b = data.full.extras %>%
  do(run.full(., alpha = alphas[14], addMid=T, normalize=F))
# Match with e6 w/ checks
m4.b.matched = m4.b[matched.items.extras, ]
m4.b.matched$degree = ifelse(m4.b.matched$degree == "hi1", "hi", "low")
m4.b.matched = cbind(m4.b.matched, data.partial$listener.p)
all(m4.b.matched$scale == data.partial$scale & m4.b.matched$degree == data.partial$degree)
## [1] TRUE
colnames(m4.b.matched)[length(colnames(m4.b.matched))] = "e6.listener.p"
# Store output
performance.output[14, ] = c("m4.b", round(cor(m4.b.matched$e6.listener.p, m4.b.matched$pred), 5), 
                             round(cor(m4.b.matched$listener.p, m4.b.matched$pred), 5), F)
# Store plot
m4.b.plot = qplot(stars, e6.listener.p, col=degree, 
      data=m4.b.matched,
      main=paste("m4.b\nalpha: ", alphas[14], "\nNormalized = ", F),
      ylab="Posterior p(rating | word)") + 
  facet_wrap(~scale) + 
  geom_line(aes(y = pred), lty = 4)

# 4.c)
# Emp alts + extras
# Normalized = T
# alpha tune = T
# compare.data = Toggle this with data.partial or NULL
alphas[15] = tune.alpha(data.full.extras, type = "full", compare.data=data.partial, addMid=T, normalize=T)
m4.c = data.full.extras %>%
  do(run.full(., alpha = alphas[15], addMid=T, normalize=T))
# Match with e6 w/ checks
m4.c.matched = m4.c[matched.items.extras, ]
m4.c.matched$degree = ifelse(m4.c.matched$degree == "hi1", "hi", "low")
m4.c.matched = cbind(m4.c.matched, data.partial$listener.p)
all(m4.c.matched$scale == data.partial$scale & m4.c.matched$degree == data.partial$degree)
## [1] TRUE
colnames(m4.c.matched)[length(colnames(m4.c.matched))] = "e6.listener.p"
# Store output (e11 and e6)
performance.output[15, ] = c("m4.c", round(cor(m4.c.matched$e6.listener.p, m4.c.matched$pred), 5),
                             round(cor(m4.c.matched$listener.p, m4.c.matched$pred), 5), T)

# Store plot
m4.c.plot = qplot(stars, listener.p, col=degree, 
      data=m4.c.matched,
      main=paste("m4.c\nalpha: ", alphas[15], "\nNormalized = ", T),
      ylab="Posterior p(rating | word)") + 
  facet_wrap(~scale) + 
  geom_line(aes(y = pred), lty = 4)

# 4.d)
# Emp alts + extras
# Normalized = F
# alpha tune = T
# compare.data = Toggle this with data.partial or NULL
alphas[16] = tune.alpha(data.full.extras, type = "full", compare.data=data.partial, addMid=T, normalize=F)
m4.d = data.full.extras %>%
  do(run.full(., alpha = alphas[16], addMid=T, normalize=F))
# Match with e6 w/ checks
m4.d.matched = m4.d[matched.items.extras, ]
m4.d.matched$degree = ifelse(m4.d.matched$degree == "hi1", "hi", "low")
m4.d.matched = cbind(m4.d.matched, data.partial$listener.p)
all(m4.d.matched$scale == data.partial$scale & m4.d.matched$degree == data.partial$degree)
## [1] TRUE
colnames(m4.d.matched)[length(colnames(m4.d.matched))] = "e6.listener.p"
# Store output
performance.output[16, ] = c("m4.d", round(cor(m4.d.matched$e6.listener.p, m4.d.matched$pred), 5),
                             round(cor(m4.d.matched$listener.p, m4.d.matched$pred), 5), F)
# Store plot
m4.d.plot = qplot(stars, e6.listener.p, col=degree, 
      data=m4.d.matched,
      main=paste("m4.d\nalpha: ", alphas[16], "\nNormalized = ", F),
      ylab="Posterior p(rating | word)") + 
  facet_wrap(~scale) + 
  geom_line(aes(y = pred), lty = 4)

# -- ^^^^^^^^^^---- ^^^^^^^^^^---- ^^^^^^^^^^---- ^^^^^^^^^^---- ^^^^^^^^^^--
# -- ^^^^^^^^^^---- ^^^^^^^^^^---- ^^^^^^^^^^---- ^^^^^^^^^^---- ^^^^^^^^^^--
# -- ^^^^^^^^^^---- ^^^^^^^^^^---- ^^^^^^^^^^---- ^^^^^^^^^^---- ^^^^^^^^^^--
# -- ^^^^^^^^^^---- ^^^^^^^^^^---- ^^^^^^^^^^---- ^^^^^^^^^^---- ^^^^^^^^^^--

# Model 3 - Full, alpha tuning
# ----------------------------
# alphas[6] = tune.alpha(data.full, type = "full", compare.data=data.partial, addMid=F, normalize=F)
# m3.fit = data.full %>%
#   do(run.full(., alpha = alphas[6]))
# m3.fit.matched = m3.fit[matched.items, ]
# m3.fit.matched$degree = ifelse(m3.fit.matched$degree == "hi1", "hi", "low")
# m3.fit.matched = cbind(m3.fit.matched, data.partial$listener.p)
# # Check that everything lines up between partial and full
# all(m3.fit.matched$scale == data.partial$scale & m3.fit.matched$degree == data.partial$degree)
# colnames(m3.fit.matched)[length(colnames(m3.fit.matched))] = "e6.listener.p"
# performance.output[6, ] = c("M3_fit", round(cor(m3.fit.matched$e6.listener.p, m3.fit.matched$pred), 5))
# 
# # Model 4 - Full, no alpha tuning
# # -------------------------------
# alphas[7] = 1
# m4.noFit = data.full.extras %>%
#   do(run.full(., alpha = alphas[7], addMid=T, normalize=T))
# m4.noFit.matched = m4.noFit[matched.items.extras, ]
# # Need to fix this to dealt with 'mid'
# m4.noFit.matched$degree = ifelse(m4.noFit.matched$degree == "hi1", "hi", "low")
# m4.noFit.matched = cbind(m4.noFit.matched, data.partial$listener.p)
# # Check that everything lines up between partial and full
# all(m4.noFit.matched$scale == data.partial$scale & m4.noFit.matched$degree == data.partial$degree)
# colnames(m4.noFit.matched)[length(colnames(m4.noFit.matched))] = "e6.listener.p"
# # store corr
# performance.output[7, ] = c("M4_noFit", round(cor(m4.noFit.matched$e6.listener.p, m4.noFit.matched$pred), 5))
# 
# # Model 4 - Full, alpha tuning, added 'mid'
# # ----------------------------
# alphas[8] = tune.alpha(data.full.extras, type = "full", compare.data=data.partial, addMid=T)
# m4.fit = data.full.extras %>%
#   do(run.full(., alpha = alphas[8], addMid=T))
# m4.fit.matched = m4.fit[matched.items.extras, ]
# m4.fit.matched$degree = ifelse(m4.fit.matched$degree == "hi1", "hi", "low")
# m4.fit.matched = cbind(m4.fit.matched, data.partial$listener.p)
# # Check that everything lines up between partial and full
# all(m4.fit.matched$scale == data.partial$scale & m4.fit.matched$degree == data.partial$degree)
# colnames(m4.fit.matched)[length(colnames(m4.fit.matched))] = "e6.listener.p"
# # store corr
# performance.output[8, ] = c("M4_fit", round(cor(m4.fit.matched$e6.listener.p, m4.fit.matched$pred), 5))

Plots

Overall model performance: \(r^2\). M1 and M2 use literal listener values from exp8. M2 includes a generic “None” defined in terms of stars (1: 1.0, 2:0.0, 3:0.0, 4:0.0, 5:0.0). M3 uses the full set of literal listener alternatives from exp10, however r^2 values and tuning reflects comparisons to exp6 pragmatic listener judgments (not exp11).

performance.output = cbind(performance.output, alphas)
grid.table(performance.output)

m1.a.plot

m1.b.plot

m1.c.plot

m1.d.plot

m2.a.plot

m2.b.plot

m2.c.plot

m2.d.plot

m3.a.plot

m3.b.plot

m3.c.plot

m3.d.plot

m4.a.plot

m4.b.plot

m4.c.plot

m4.d.plot

# performance.output$model = factor(performance.output$model, levels=c(
#   "M1_noFit", "M1_fit", "M2_noFit", "M2_fit", "M3_noFit", "M3_fit", "M4_noFit", "M4_fit"
# ))
# # Populate extra model info
# performance.output$cor = as.numeric(performance.output$cor)
# performance.output$alphas = alphas
# performance.output$alts = c(rep("entailment only", 2), rep("entailment + None", 2), rep("full", 2), rep("full + extras", 2))
# grid.table(performance.output)
# 
# qplot(data=performance.output, x=model, y=performance.output$cor,
#       geom="bar", stat="identity",
#       main = "Model performance", ylab="r^2", xlab="model") +
#   geom_text(aes(label = round(cor, 3), y = cor + 0.02), size = 3) +
#   scale_y_continuous(limits=c(0, 1))

Individual model performance

# m1.noFit.G = qplot(stars, listener.p, col=degree, 
#       data=m1.noFit,
#       main=paste("M1, no fit\nalpha: ", alphas[1]),
#       ylab="Posterior p(rating | word)") + 
#   facet_wrap(~scale) + 
#   geom_line(aes(y = pred), lty = 4)
# # m1.noFit.G
# 
# m1.fit.G = qplot(stars, listener.p, col=degree, 
#       data=m1.fit,
#       main=paste("M1, fit\nalpha: ", alphas[2]),
#       ylab="Posterior p(rating | word)") + 
#   facet_wrap(~scale) + 
#   geom_line(aes(y = pred), lty = 4)
# m1.fit.G
# 
# m2.noFit.G = qplot(stars, listener.p, col=degree, 
#       data=m2.noFit,
#       main=paste("M2, no fit\nalpha: ", alphas[3]),
#       ylab="Posterior p(rating | word)") + 
#   facet_wrap(~scale) + 
#   geom_line(aes(y = pred), lty = 4)
# m2.noFit.G
# 
# m2.fit.G = qplot(stars, listener.p, col=degree, 
#       data=m2.fit,
#       main=paste("M2, fit\nalpha: ", alphas[4]),
#       ylab="Posterior p(rating | word)") + 
#   facet_wrap(~scale) + 
#   geom_line(aes(y = pred), lty = 4)
# m2.fit.G
# 
# m3.matched.noFit.G = qplot(stars, e6.listener.p, col=degree, 
#       data=m3.noFit.matched,
#       main=paste("M3, no fit\nalpha: ", alphas[5]),
#       ylab="Posterior p(rating | word)") + 
#   facet_wrap(~scale) + 
#   geom_line(aes(y = pred), lty = 4)
# m3.matched.noFit.G
# cor(m3.noFit.matched$e6.listener.p, m3.noFit.matched$pred)
# 
# m3.matched.fit.G = qplot(stars, e6.listener.p, col=degree, 
#       data=m3.fit.matched,
#       main=paste("M3, fit\nalpha: ", alphas[6]),
#       ylab="Posterior p(rating | word)") + 
#   facet_wrap(~scale) + 
#   geom_line(aes(y = pred), lty = 4)
# m3.matched.fit.G
# cor(m3.fit.matched$e6.listener.p, m3.fit.matched$pred)
# 
# # Full model fitted - color scale
# m3.matched.fit.CorPlot = ggplot(data=m3.fit.matched, aes(y=pred, x=e6.listener.p)) +
#   geom_point(aes(colour = scale)) +
#   geom_smooth(method=lm) +
#   ggtitle("Model performance: M3 fit") +
#   labs(x = "human judgments", y="model prediction")
# m3.matched.fit.CorPlot
# 
# # Full model fitted - color stars
# m3.matched.fit.CorPlot = ggplot(data=m3.fit.matched, aes(y=pred, x=e6.listener.p)) +
#   geom_point(aes(colour = stars)) +
#   geom_smooth(method=lm) +
#   ggtitle("Model performance: M3 fit") +
#   labs(x = "human judgments", y="model prediction")
# m3.matched.fit.CorPlot
# 
# m4.matched.noFit.G = qplot(stars, e6.listener.p, col=degree, 
#       data=m4.noFit.matched,
#       main=paste("M4, no fit\nalpha: ", alphas[7]),
#       ylab="Posterior p(rating | word)") + 
#   facet_wrap(~scale) + 
#   geom_line(aes(y = pred), lty = 4)
# m4.matched.noFit.G
# 
# m4.matched.fit.G = qplot(stars, e6.listener.p, col=degree, 
#       data=m4.fit.matched,
#       main=paste("M4, fit\nalpha: ", alphas[8]),
#       ylab="Posterior p(rating | word)") + 
#   facet_wrap(~scale) + 
#   geom_line(aes(y = pred), lty = 4)
# m4.matched.fit.G

Other exploration

# bad.predictions = m3.fit.matched$pred > 0 &
#                   m3.fit.matched$e6.listener.p == 0 &
#                   (m3.fit.matched$pred - m3.fit.matched$e6.listener.p > 0.05)